<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang="zh-CN">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />















  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Inziu Iosevka Slab SC:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.2" rel="stylesheet" type="text/css" />


  <meta name="keywords" content="AdaBoost," />








  <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.2" />






<meta name="description" content="adaptive boosting 的主要思想是，基于统一数据集训练出多个分类器，每个分类器擅长在不同的子数据集上得出正确的分类，最后在预测是每个分类器都得出一个预测值，综合得出最终预测值。">
<meta name="keywords" content="AdaBoost">
<meta property="og:type" content="article">
<meta property="og:title" content="The AdaBoost Meta-Algorithm">
<meta property="og:url" content="http://idmk.oschina.io/2017/08/26/the-AdaBoost-meta-algorithm/index.html">
<meta property="og:site_name" content="苦舟">
<meta property="og:description" content="adaptive boosting 的主要思想是，基于统一数据集训练出多个分类器，每个分类器擅长在不同的子数据集上得出正确的分类，最后在预测是每个分类器都得出一个预测值，综合得出最终预测值。">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://idmk.oschina.io/2017/08/26/the-AdaBoost-meta-algorithm/markdown-img-paste-20170826211832185.png">
<meta property="og:updated_time" content="2017-08-26T12:53:48.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="The AdaBoost Meta-Algorithm">
<meta name="twitter:description" content="adaptive boosting 的主要思想是，基于统一数据集训练出多个分类器，每个分类器擅长在不同的子数据集上得出正确的分类，最后在预测是每个分类器都得出一个预测值，综合得出最终预测值。">
<meta name="twitter:image" content="http://idmk.oschina.io/2017/08/26/the-AdaBoost-meta-algorithm/markdown-img-paste-20170826211832185.png">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Gemini',
    sidebar: {"position":"left","display":"hide","offset":12,"offset_float":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: true,
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://idmk.oschina.io/2017/08/26/the-AdaBoost-meta-algorithm/"/>





  <title>The AdaBoost Meta-Algorithm | 苦舟</title>
  














</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail ">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">苦舟</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">学海无涯，吾将上下求索。</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-commonweal">
          <a href="/404.html" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br />
            
            公益404
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off"
             placeholder="搜索..." spellcheck="false"
             type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://idmk.oschina.io/2017/08/26/the-AdaBoost-meta-algorithm/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="东木金">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/uploads/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="苦舟">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">The AdaBoost Meta-Algorithm</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2017-08-26T20:53:48+08:00">
                2017-08-26
              </time>
            

            

            
          </span>

          
            <span class="post-category" >
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/ML/" itemprop="url" rel="index">
                    <span itemprop="name">ML</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>adaptive boosting 的主要思想是，基于统一数据集训练出多个分类器，每个分类器擅长在不同的子数据集上得出正确的分类，最后在预测是每个分类器都得出一个预测值，综合得出最终预测值。<br><a id="more"></a><br>AdaBoost is short for adaptive boosting. AdaBoost works this way: A weight is appliedto every example in the training data. We ’ ll call the weight vector D. Initially, these weights are all equal.<br>A weak classifier is first trained on the training data. The errors from the weak classifier are calculated, and the weak classifier is trained a second time with the same dataset.<br>This second time the weak classifier is trained, the weights of the training set are adjusted so the examples properly classified the first time are weighted less and the examples incorrectly classified in the first iteration are weighted more.<br>To get one answer from all of these weak classifiers, AdaBoost assigns alpha values to each of the classifiers. The alpha values are based on the error of each weak classifier.</p>
<p>The error Epsilon is given by<br>$$ \varepsilon = \dfrac{number\ of\ incorrectly\ classified\ examples}{total\ number\ of\ examples} $$<br>The alpha is given by<br>$$ \alpha = \frac{1}{2} \ln \left( \dfrac{1 - \varepsilon}{ \varepsilon } \right) $$</p>

<p>Schematic representation of AdaBoost; with the dataset on the left side, the different widths of the bars represent weights applied to each instance. The weighted predictions pass through a classifier, which is then weighted by the triangles (a values). The weighted output of each triangle is summed up in the circle, which produces the final output.</p>
<p>After you calculate alpha, you can update the weight vector D so that the examples that are correctly classified will decrease in weight and the misclassified examples will increase in weight. D is given by</p>
<img src="/2017/08/26/the-AdaBoost-meta-algorithm/markdown-img-paste-20170826211832185.png" alt="markdown-img-paste-20170826211832185.png" title="">
<p>Set the minError to +¥ For every feature in the dataset:      For every step:        For each inequality:          Build a decision stump and test it with the weighted dataset          If the error is less than minError: set this stump as the best stumpReturn the best stump</p>
<h2 id="代码实现及测试"><a href="#代码实现及测试" class="headerlink" title="代码实现及测试"></a>代码实现及测试</h2><figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</div></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div><div class="line">29</div><div class="line">30</div><div class="line">31</div><div class="line">32</div><div class="line">33</div><div class="line">34</div><div class="line">35</div><div class="line">36</div><div class="line">37</div><div class="line">38</div><div class="line">39</div><div class="line">40</div><div class="line">41</div><div class="line">42</div><div class="line">43</div><div class="line">44</div><div class="line">45</div><div class="line">46</div><div class="line">47</div><div class="line">48</div><div class="line">49</div><div class="line">50</div><div class="line">51</div><div class="line">52</div><div class="line">53</div><div class="line">54</div><div class="line">55</div><div class="line">56</div></pre></td><td class="code"><pre><div class="line"><span class="function"><span class="keyword">def</span> <span class="title">load_dataset</span><span class="params">()</span>:</span></div><div class="line">    X = np.mat([</div><div class="line">        [<span class="number">1</span>, <span class="number">2.1</span>],</div><div class="line">        [<span class="number">2</span>, <span class="number">1.1</span>],</div><div class="line">        [<span class="number">1.3</span>, <span class="number">1</span>],</div><div class="line">        [<span class="number">1</span>, <span class="number">1</span>],</div><div class="line">        [<span class="number">2</span>, <span class="number">1</span>]</div><div class="line">    ])</div><div class="line">    y = [<span class="number">1.0</span>, <span class="number">1.0</span>, <span class="number">-1.0</span>, <span class="number">-1.0</span>, <span class="number">1.0</span>]</div><div class="line"></div><div class="line">    <span class="keyword">return</span> X, y</div><div class="line"></div><div class="line"></div><div class="line"><span class="function"><span class="keyword">def</span> <span class="title">stump_classify</span><span class="params">(X, dim, thre, cmp_oper)</span>:</span></div><div class="line">    <span class="string">""" 根据传入的阈值，将在阈值一边的数据分到类别 -1，而在另一边的数据分到类别 +1"""</span></div><div class="line">    ret_arr = np.ones((X.shape[<span class="number">0</span>], <span class="number">1</span>))</div><div class="line">    <span class="keyword">if</span> cmp_oper == <span class="string">'lt'</span>:</div><div class="line">        ret_arr[X[:, dim] &lt;= thre] = <span class="number">-1.0</span></div><div class="line">    <span class="keyword">else</span>:</div><div class="line">        ret_arr[X[:, dim] &gt; thre] = <span class="number">-1.0</span></div><div class="line">    <span class="keyword">return</span> ret_arr</div><div class="line"></div><div class="line"><span class="function"><span class="keyword">def</span> <span class="title">build_stump</span><span class="params">(X, y, w)</span>:</span></div><div class="line">    <span class="string">""" 输入为权重向量 w ，返回的则是利用 w 而得到的具有最小错误率的单层决策树 """</span></div><div class="line">    x_mat = np.mat(X)</div><div class="line">    y_mat = np.mat(y).T</div><div class="line">    m, n = X.shape</div><div class="line">    n_steps = <span class="number">10</span></div><div class="line">    beststump = &#123;&#125;</div><div class="line">    best_hypo = np.mat(np.zeros((m, <span class="number">1</span>)))</div><div class="line">    min_error = np.inf</div><div class="line">    <span class="comment"># 遍历所有维度的特征</span></div><div class="line">    <span class="keyword">for</span> i <span class="keyword">in</span> range(n):</div><div class="line">        <span class="comment"># 遍历特征的取值范围，遍历范围应该大于特征的取值范围</span></div><div class="line">        range_min = x_mat[:, i].min()</div><div class="line">        range_max = x_mat[:, i].max()</div><div class="line">        step_size = (range_max - range_min) / n_steps</div><div class="line">        thre = range_min - <span class="number">2</span> * step_size</div><div class="line">        <span class="keyword">for</span> j <span class="keyword">in</span> range(<span class="number">-1</span>, int(n_steps) + <span class="number">1</span>):</div><div class="line">            thre += step_size</div><div class="line">            <span class="comment"># 切换取值范围</span></div><div class="line">            <span class="keyword">for</span> cmp_oper <span class="keyword">in</span> [<span class="string">'lt'</span>, <span class="string">'gt'</span>]:</div><div class="line">                hypo = stump_classify(X, i, thre, cmp_oper)</div><div class="line">                err_arr = np.mat(np.ones((m, <span class="number">1</span>)))</div><div class="line">                err_arr[hypo == y_mat] = <span class="number">0</span></div><div class="line">                weighted_error = w.T * err_arr</div><div class="line">                print(<span class="string">"split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f"</span>\</div><div class="line">                 % (i, thre, cmp_oper, weighted_error))</div><div class="line">                <span class="keyword">if</span> weighted_error &lt; min_error:</div><div class="line">                    min_error = weighted_error</div><div class="line">                    best_hypo = hypo.copy()</div><div class="line">                    beststump[<span class="string">'dim'</span>] = i</div><div class="line">                    beststump[<span class="string">'thre'</span>] = thre</div><div class="line">                    beststump[<span class="string">'com_oper'</span>] = cmp_oper</div><div class="line"></div><div class="line">    <span class="keyword">return</span> beststump, min_error, best_hypo</div></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div></pre></td><td class="code"><pre><div class="line">X, y = load_dataset()</div><div class="line">w = np.mat(np.ones((<span class="number">5</span>, <span class="number">1</span>)) / <span class="number">5</span>)</div><div class="line">build_stump(X, y, w)</div></pre></td></tr></table></figure>
<pre><code>split: dim 0, thresh 0.90, thresh ineqal: lt, the weighted error is 0.400
split: dim 0, thresh 0.90, thresh ineqal: gt, the weighted error is 0.600
split: dim 0, thresh 1.00, thresh ineqal: lt, the weighted error is 0.400
split: dim 0, thresh 1.00, thresh ineqal: gt, the weighted error is 0.600
split: dim 0, thresh 1.10, thresh ineqal: lt, the weighted error is 0.400
split: dim 0, thresh 1.10, thresh ineqal: gt, the weighted error is 0.600
split: dim 0, thresh 1.20, thresh ineqal: lt, the weighted error is 0.400
split: dim 0, thresh 1.20, thresh ineqal: gt, the weighted error is 0.600
split: dim 0, thresh 1.30, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.30, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.40, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.40, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.50, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.50, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.60, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.60, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.70, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.70, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.80, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.80, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 1.90, thresh ineqal: lt, the weighted error is 0.200
split: dim 0, thresh 1.90, thresh ineqal: gt, the weighted error is 0.800
split: dim 0, thresh 2.00, thresh ineqal: lt, the weighted error is 0.600
split: dim 0, thresh 2.00, thresh ineqal: gt, the weighted error is 0.400
split: dim 1, thresh 0.89, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 0.89, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.00, thresh ineqal: lt, the weighted error is 0.200
split: dim 1, thresh 1.00, thresh ineqal: gt, the weighted error is 0.800
split: dim 1, thresh 1.11, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.11, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.22, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.22, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.33, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.33, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.44, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.44, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.55, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.55, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.66, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.66, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.77, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.77, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.88, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.88, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 1.99, thresh ineqal: lt, the weighted error is 0.400
split: dim 1, thresh 1.99, thresh ineqal: gt, the weighted error is 0.600
split: dim 1, thresh 2.10, thresh ineqal: lt, the weighted error is 0.600
split: dim 1, thresh 2.10, thresh ineqal: gt, the weighted error is 0.400





({&apos;com_oper&apos;: &apos;lt&apos;, &apos;dim&apos;: 0, &apos;thre&apos;: 1.3000000000000003},
 matrix([[ 0.2]]),
 array([[-1.],
        [ 1.],
        [-1.],
        [-1.],
        [ 1.]]))
</code></pre><figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div><div class="line">29</div><div class="line">30</div><div class="line">31</div><div class="line">32</div><div class="line">33</div><div class="line">34</div></pre></td><td class="code"><pre><div class="line"><span class="function"><span class="keyword">def</span> <span class="title">build_stump</span><span class="params">(X, y, w)</span>:</span></div><div class="line">    <span class="string">""" 输入为权重向量 w ，返回的则是利用 w 而得到的具有最小错误率的单层决策树 """</span></div><div class="line">    x_mat = np.mat(X)</div><div class="line">    y_mat = np.mat(y).T</div><div class="line">    m, n = X.shape</div><div class="line">    n_steps = <span class="number">10</span></div><div class="line">    beststump = &#123;&#125;</div><div class="line">    best_hypo = np.mat(np.zeros((m, <span class="number">1</span>)))</div><div class="line">    min_error = np.inf</div><div class="line">    <span class="comment"># 遍历所有维度的特征</span></div><div class="line">    <span class="keyword">for</span> i <span class="keyword">in</span> range(n):</div><div class="line">        <span class="comment"># 遍历特征的取值范围，遍历范围应该大于特征的取值范围</span></div><div class="line">        range_min = x_mat[:, i].min()</div><div class="line">        range_max = x_mat[:, i].max()</div><div class="line">        step_size = (range_max - range_min) / n_steps</div><div class="line">        thre = range_min - <span class="number">2</span> * step_size</div><div class="line">        <span class="keyword">for</span> j <span class="keyword">in</span> range(<span class="number">-1</span>, int(n_steps) + <span class="number">1</span>):</div><div class="line">            thre += step_size</div><div class="line">            <span class="comment"># 切换取值范围</span></div><div class="line">            <span class="keyword">for</span> cmp_oper <span class="keyword">in</span> [<span class="string">'lt'</span>, <span class="string">'gt'</span>]:</div><div class="line">                hypo = stump_classify(X, i, thre, cmp_oper)</div><div class="line">                err_arr = np.mat(np.ones((m, <span class="number">1</span>)))</div><div class="line">                err_arr[hypo == y_mat] = <span class="number">0</span></div><div class="line">                weighted_error = w.T * err_arr</div><div class="line">                <span class="comment"># print("split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f"\</span></div><div class="line">                <span class="comment">#  % (i, thre, cmp_oper, weighted_error))</span></div><div class="line">                <span class="keyword">if</span> weighted_error &lt; min_error:</div><div class="line">                    min_error = weighted_error</div><div class="line">                    best_hypo = hypo.copy()</div><div class="line">                    beststump[<span class="string">'dim'</span>] = i</div><div class="line">                    beststump[<span class="string">'thre'</span>] = thre</div><div class="line">                    beststump[<span class="string">'com_oper'</span>] = cmp_oper</div><div class="line"></div><div class="line">    <span class="keyword">return</span> beststump, min_error, best_hypo</div></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div></pre></td><td class="code"><pre><div class="line"><span class="function"><span class="keyword">def</span> <span class="title">ada_boost_train_ds</span><span class="params">(X, y, max_cycles=<span class="number">40</span>)</span>:</span></div><div class="line">    weak_cler = []</div><div class="line">    m = X.shape[<span class="number">0</span>]</div><div class="line">    w = np.mat(np.ones((m, <span class="number">1</span>)) / m)</div><div class="line">    agg_error = np.mat(np.zeros((m, <span class="number">1</span>)))</div><div class="line">    <span class="keyword">for</span> i <span class="keyword">in</span> range(max_cycles):</div><div class="line">        beststump, error, hypo = build_stump(X, y, w)</div><div class="line">        print(<span class="string">'w'</span>, w.T)</div><div class="line">        <span class="comment"># 该值会告诉总分类器本次单层决策树输出结果的权重，其中的语句 max(error, 1e-16) 用于确保在没有错误时不会发生除零溢出。</span></div><div class="line">        alpha = float(<span class="number">0.5</span> * np.log((<span class="number">1.0</span>-error) / max(error, <span class="number">1e-16</span>)))</div><div class="line">        <span class="comment"># 该词典包括了分类所需要的所有信息</span></div><div class="line">        beststump[<span class="string">'alpha'</span>] = alpha</div><div class="line">        weak_cler.append(beststump)</div><div class="line">        print(<span class="string">'hypo'</span>, hypo.T)</div><div class="line">        <span class="comment">#  为下一次迭代计算 *D*</span></div><div class="line">        expon = np.multiply(<span class="number">-1</span> * alpha * np.mat(y).T, hypo)</div><div class="line">        w = np.multiply(w, np.exp(expon))</div><div class="line">        w = w / w.sum()</div><div class="line">        <span class="comment"># 错误率累加计算</span></div><div class="line">        agg_error += alpha * hypo</div><div class="line">        print(<span class="string">'agg_error:'</span>, agg_error.T)</div><div class="line">        agg_errors = np.multiply(np.sign(agg_error) != np.mat(y).T, np.ones((m, <span class="number">1</span>)))</div><div class="line">        error_rate = agg_errors.sum() / m</div><div class="line">        print(<span class="string">'total error:'</span>, error_rate)</div><div class="line">        <span class="keyword">if</span> error_rate == <span class="number">0.0</span>:</div><div class="line">            <span class="keyword">break</span></div><div class="line">    <span class="keyword">return</span> weak_cler</div></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">ada_boost_train_ds(X, y, <span class="number">9</span>)</div></pre></td></tr></table></figure>
<pre><code>w [[ 0.2  0.2  0.2  0.2  0.2]]
hypo [[-1.  1. -1. -1.  1.]]
agg_error: [[-0.69314718  0.69314718 -0.69314718 -0.69314718  0.69314718]]
total error: 0.2
w [[ 0.5    0.125  0.125  0.125  0.125]]
hypo [[ 1.  1. -1. -1. -1.]]
agg_error: [[ 0.27980789  1.66610226 -1.66610226 -1.66610226 -0.27980789]]
total error: 0.2
w [[ 0.28571429  0.07142857  0.07142857  0.07142857  0.5       ]]
hypo [[ 1.  1.  1.  1.  1.]]
agg_error: [[ 1.17568763  2.56198199 -0.77022252 -0.77022252  0.61607184]]
total error: 0.0





[{&apos;alpha&apos;: 0.6931471805599453,
  &apos;com_oper&apos;: &apos;lt&apos;,
  &apos;dim&apos;: 0,
  &apos;thre&apos;: 1.3000000000000003},
 {&apos;alpha&apos;: 0.9729550745276565, &apos;com_oper&apos;: &apos;lt&apos;, &apos;dim&apos;: 1, &apos;thre&apos;: 1.0},
 {&apos;alpha&apos;: 0.8958797346140273,
  &apos;com_oper&apos;: &apos;lt&apos;,
  &apos;dim&apos;: 0,
  &apos;thre&apos;: 0.90000000000000002}]
</code></pre>
      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/AdaBoost/" rel="tag"># AdaBoost</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/08/09/matplotlib-note/" rel="next" title="Matplotlib 使用笔记">
                <i class="fa fa-chevron-left"></i> Matplotlib 使用笔记
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/08/27/Regular-Expression/" rel="prev" title="Regular Expression">
                Regular Expression <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          
  <div class="comments" id="comments">
    
  </div>


        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap" >
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
          <img class="site-author-image" itemprop="image"
               src="/uploads/avatar.jpg"
               alt="东木金" />
          <p class="site-author-name" itemprop="name">东木金</p>
           
              <p class="site-description motion-element" itemprop="description">正在学习机器学习，希望能变得很强！</p>
          
        </div>
        <nav class="site-state motion-element">

          
            <div class="site-state-item site-state-posts">
              <a href="/archives/">
                <span class="site-state-item-count">162</span>
                <span class="site-state-item-name">日志</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-categories">
              <a href="/categories/index.html">
                <span class="site-state-item-count">18</span>
                <span class="site-state-item-name">分类</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-tags">
              <a href="/tags/index.html">
                <span class="site-state-item-count">42</span>
                <span class="site-state-item-name">标签</span>
              </a>
            </div>
          

        </nav>

        

        <div class="links-of-author motion-element">
          
            
              <span class="links-of-author-item">
                <a href="https://github.com/bdmk" target="_blank" title="GitHub">
                  
                    <i class="fa fa-fw fa-github"></i>
                  
                    
                      GitHub
                    
                </a>
              </span>
            
              <span class="links-of-author-item">
                <a href="mailto:catcherchan94@outlook.com" target="_blank" title="E-Mail">
                  
                    <i class="fa fa-fw fa-envelope"></i>
                  
                    
                      E-Mail
                    
                </a>
              </span>
            
          
        </div>

        
        

        
        

        


      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#代码实现及测试"><span class="nav-number">1.</span> <span class="nav-text">代码实现及测试</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright" >
  
  &copy;  2017 - 
  <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">东木金</span>
</div>


<div class="powered-by">
  由 <a class="theme-link" href="https://hexo.io">Hexo</a> 强力驱动
</div>

<div class="theme-info">
  主题 -
  <a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
    NexT.Gemini
  </a>
</div>


        

        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>

  
  <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>

  
  <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.2"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=5.1.2"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.2"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.2"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.2"></script>



  


  




	





  





  






  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'manual') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  


  

  

</body>
</html>
